#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c"
#else

void THNN_(SpatialFullConvolutionMap_updateOutput)(
  THNNState *state, THTensor *input, THTensor *output_, THTensor *weight, THTensor *bias,
  THTensor *connTable, int nInputPlane, int nOutputPlane,
  int dW, int dH)
{
  THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
  THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
  // What does this mean?
  THArgCheck(
    weight != NULL && !weight->is_empty() && weight->dim() == 3
    && connTable != NULL && connTable->size(0) == weight->size(0), 4,
    "non-empty 3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
  );

  const int kH = (int)weight->size(1);
  const int kW = (int)weight->size(2);

  THArgCheck(input != NULL && !input->is_empty() && input->dim() == 3, 2, "non-empty 3D tensor expected");
  THArgCheck(input->size(0) >= nInputPlane, 2, "invalid number of input planes");

  THTensor_(resize3d)(
    output_, nOutputPlane,
    (input->size(1) - 1) * dH + kH,
    (input->size(2) - 1) * dW + kW
  );

  /* contiguous */
  input = THTensor_(newContiguous)(input);
  THTensor* output = THTensor_(newContiguous)(output_);

  /* get raw pointers */
  scalar_t *input_data = input->data<scalar_t>();
  scalar_t *output_data = output->data<scalar_t>();
  scalar_t *weight_data = weight->data<scalar_t>();
  scalar_t *bias_data = bias->data<scalar_t>();
  scalar_t *connTable_data = connTable->data<scalar_t>();

  /* and dims */
  const int64_t input_h = input->size(1);
  const int64_t input_w = input->size(2);
  const int64_t output_h = output->size(1);
  const int64_t output_w = output->size(2);
  const int64_t weight_h = weight->size(1);
  const int64_t weight_w = weight->size(2);

  int64_t p;
#pragma omp parallel for private(p)
  for (p = 0; p < nOutputPlane; p++)
  {
    /* add bias */
    scalar_t *ptr_output = output_data + p*output_w*output_h;
    int64_t j;
    int nweight;
    int64_t k;

    for (j = 0; j < output_h*output_w; j++)
      ptr_output[j] = bias_data[p];

    /* convolve all maps */
    nweight = connTable->size(0);
    for (k = 0; k < nweight; k++)
    {
      /* get offsets for input/output */
      int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
      int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;

      if (o == p)
      {
        THTensor_(fullConv2Dptr)(
          output_data + o*output_w*output_h,
          1.0,
          input_data + i*input_w*input_h, input_h, input_w,
          weight_data + k*weight_w*weight_h, weight_h, weight_w,
          dH, dW
        );
      }
    }
  }

  /* clean up */
  c10::raw::intrusive_ptr::decref(input);
  THTensor_(freeCopyTo)(output, output_);
}

void THNN_(SpatialFullConvolutionMap_updateGradInput)(
  THNNState *state, THTensor *input, THTensor *gradOutput, THTensor *gradInput_, THTensor *weight, THTensor *bias,
  THTensor *connTable, int nInputPlane, int nOutputPlane,
  int dW, int dH)
{
  THArgCheck(
    weight != NULL && !weight->is_empty() && weight->dim() == 3
    && connTable != NULL && connTable->size(0) == weight->size(0), 5,
    "non-empty 3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
  );

  /* contiguous */
  THTensor* gradInput = THTensor_(newContiguous)(gradInput_);
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* Resize/Zero */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  /* get raw pointers */
  scalar_t *gradInput_data = gradInput->data<scalar_t>();
  scalar_t *gradOutput_data = gradOutput->data<scalar_t>();
  scalar_t *weight_data = weight->data<scalar_t>();
  scalar_t *connTable_data = connTable->data<scalar_t>();

  /* and dims */
  const int64_t input_h = input->size(1);
  const int64_t input_w = input->size(2);
  const int64_t output_h = gradOutput->size(1);
  const int64_t output_w = gradOutput->size(2);
  const int64_t kH = weight->size(1);
  const int64_t kW = weight->size(2);

  int64_t p;
#pragma omp parallel for private(p)
  for (p = 0; p < nInputPlane; p++)
  {
    int64_t k;
    /* backward all */
    int nkernel = connTable->size(0);
    for (k = 0; k < nkernel; k++)
    {
      int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
      int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
      if (i == p)
      {
        /* gradient to input */
        THTensor_(validXCorr2Dptr)(
          gradInput_data + i*input_w*input_h,
          1.0,
          gradOutput_data + o*output_w*output_h,  output_h,  output_w,
          weight_data + k*kW*kH, kH, kW,
          dH, dW
        );
      }
    }
  }

  /* clean up */
  THTensor_(freeCopyTo)(gradInput, gradInput_);
  c10::raw::intrusive_ptr::decref(gradOutput);
}

void THNN_(SpatialFullConvolutionMap_accGradParameters)(
  THNNState *state,
  THTensor *input,
  THTensor *gradOutput,
  THTensor *gradWeight,
  THTensor *gradBias,
  THTensor *connTable,
  int nInputPlane,
  int nOutputPlane,
  int dW, int dH,
  accreal scale_)
{
  scalar_t scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
  THArgCheck(
    gradWeight != NULL && !gradWeight->is_empty() && gradWeight->dim() == 3
    && connTable != NULL && connTable->size(0) == gradWeight->size(0), 5,
    "non-empty 3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
  );

  /* contiguous */
  input = THTensor_(newContiguous)(input);
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* get raw pointers */
  scalar_t *input_data = input->data<scalar_t>();
  scalar_t *gradOutput_data = gradOutput->data<scalar_t>();
  scalar_t *gradWeight_data = gradWeight->data<scalar_t>();
  scalar_t *gradBias_data = gradBias->data<scalar_t>();

  /* and dims */
  const int64_t input_h  = input->size(1);
  const int64_t input_w  = input->size(2);
  const int64_t output_h = gradOutput->size(1);
  const int64_t output_w = gradOutput->size(2);
  const int64_t weight_h = gradWeight->size(1);
  const int64_t weight_w = gradWeight->size(2);

  /* gradients wrt bias */
  int64_t k;
#pragma omp parallel for private(k)
  for (k = 0; k < nOutputPlane; k++)
  {
    scalar_t *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
    int64_t l;
    for (l = 0; l < output_h*output_w; l++)
      gradBias_data[k] += scale*ptr_gradOutput[l];
  }

  /* gradients wrt weight */
  int nkernel = connTable->size(0);
#pragma omp parallel for private(k)
  for (k = 0; k < nkernel; k++)
  {
    int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
    int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;

    /* gradient to kernel */
    THTensor_(validXCorr2DRevptr)(
      gradWeight_data + k*weight_w*weight_h,
      scale,
      gradOutput_data + o*output_w*output_h, output_h, output_w,
      input_data + i*input_w*input_h, input_h, input_w,
      dH, dW
    );
  }

  /* clean up */
  c10::raw::intrusive_ptr::decref(input);
  c10::raw::intrusive_ptr::decref(gradOutput);
}

#endif
